This research is inspired by a simple, yet profound question: Does a lack of working street lights make neighborhoods less safe?
For years, city planners have assumed that more light means less crime. However, the history of public lighting is complicated.We recognize that past research (Yared, 2021) shows how street lamps have been used in powerful, and sometimes negative, ways to control people. This reminds us that light is not just a utility; it’s a critical tool tied to social equity and surveillance.
With this context, our project tackles a specific and urgent issue: we are using modern crime data to measure the direct link between failing infrastructure (street lights that are out) and serious violence (shooting incidents) in Manhattan neighborhoods.
By looking at crime that happens during the day compared to crime that happens at night, our goal is to find clear, data-driven answers that can help city officials decide where to spend money to fix lights and how to make every block safer.
Note: The street lamp data used for this analysis is restricted to the years 2020 through 2025 due to data storage and management requirements.
This map visualizes the total number of shooting incidents that occurred and total number of “street light out” service requests reported within each Manhattan census tract in 2025.
<<<<<<< HEAD ======= >>>>>>> b5e91f5ab996c284e732c8789e2f1bd2c4b7f3a1This map visualizes the total number of shooting incidents that occurred and total number of “street light out” service requests reported within each Manhattan census tract over the entire study period.
<<<<<<< HEAD ======= >>>>>>> b5e91f5ab996c284e732c8789e2f1bd2c4b7f3a1This part models the association between the count of street lights out and the count of shooting incidents per Manhattan census tract using two methods: Poisson Regression (cross-sectional, 2025 only) and Generalized Estimating Equations (GEE) (longitudinal, 2020-2025).
This analysis uses standard Poisson Regression to estimate the association between the number of street lights out and the number of shooting cases in each census tract for the year 2025.
# Calculate 2025 Shooting Counts Per Tract
shootings_per_tract_25 <-
st_join(manhattan_pop, shooting25_sf) |>
group_by(GEOID) |>
summarize(
N_SHOOTINGS_2025 = sum(!is.na(incident_key)),
.groups = 'drop'
) |>
st_drop_geometry()
# Prepare final 2025 dataset
final_cross_section_2025 <-
manhattan_pop |>
left_join(shootings_per_tract_25, by = "GEOID") |>
left_join(lights_per_tract_25, by = "GEOID") |>
mutate(
N_BROKEN_LIGHTS_2025 = replace_na(N_BROKEN_LIGHTS_2025, 0),
N_SHOOTINGS_2025 = replace_na(N_SHOOTINGS_2025, 0)
) |>
st_drop_geometry()
# --- POISSON REGRESSION MODEL ---
poisson_model_2025 <- glm(
N_SHOOTINGS_2025 ~ N_BROKEN_LIGHTS_2025,
family = poisson,
data = final_cross_section_2025
)
# 1. Print the Model Summary
summary(poisson_model_2025)
##
## Call:
## glm(formula = N_SHOOTINGS_2025 ~ N_BROKEN_LIGHTS_2025, family = poisson,
## data = final_cross_section_2025)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.91705 0.19716 -9.723 < 2e-16 ***
## N_BROKEN_LIGHTS_2025 0.04535 0.01503 3.018 0.00254 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 252.75 on 309 degrees of freedom
## Residual deviance: 244.93 on 308 degrees of freedom
## AIC: 365.33
##
## Number of Fisher Scoring iterations: 6
# 2. Calculate Incidence Rate Ratios (IRR)
exp(coef(poisson_model_2025))
## (Intercept) N_BROKEN_LIGHTS_2025
## 0.1470408 1.0463913
This analysis uses a Generalized Estimating Equation (GEE) Poisson Model to assess the association over multiple years (2020-2025).
all_geoids <- manhattan_pop |> st_drop_geometry() |> dplyr::select(GEOID)
all_years <- tibble(created_year = 2020:2025)
base_grid <- all_geoids |>
cross_join(all_years) |>
mutate(GEOID = as.character(GEOID))
# --- CALCULATE ANNUAL SHOOTING COUNTS (2020-2025) ---
shooting20_25_df_filtered <- merged_shooting_df |>
filter(created_year >= 2020 & created_year <= 2025) |>
drop_na(latitude, longitude) # Ensure no NA coords here
shooting20_25_sf <- st_as_sf(
shooting20_25_df_filtered,
coords = c("longitude", "latitude"),
crs = 4326
)
shootings_per_year <- st_join(manhattan_pop, shooting20_25_sf) |>
st_drop_geometry() |>
filter(!is.na(incident_key)) |>
group_by(GEOID, created_year) |>
summarize(
N_SHOOTINGS = n(),
.groups = 'drop'
) |>
mutate(GEOID = as.character(GEOID))
# --- CALCULATE ANNUAL BROKEN LIGHT COUNTS (2020-2025) ---
# Street light data (street11_25_light_sf_hist) prepared in Part 1/Step 6
lights_per_year <- st_join(manhattan_pop, street11_25_light_sf_hist) |>
st_drop_geometry() |>
filter(!is.na(descriptor)) |>
group_by(GEOID, created_year) |>
summarize(
N_BROKEN_LIGHTS = n(),
.groups = 'drop'
) |>
mutate(GEOID = as.character(GEOID))
# --- MERGE AND CLEAN LONGITUDINAL DATA ---
final_longitudinal_data <- base_grid |>
left_join(shootings_per_year, by = c("GEOID", "created_year")) |>
left_join(lights_per_year, by = c("GEOID", "created_year")) |>
mutate(
N_SHOOTINGS = replace_na(N_SHOOTINGS, 0),
N_BROKEN_LIGHTS = replace_na(N_BROKEN_LIGHTS, 0)
)
# --- GENERALIZED ESTIMATING EQUATION (GEE) MODEL ---
poisson_gee_model <- geepack::geeglm(
N_SHOOTINGS ~ N_BROKEN_LIGHTS + created_year,
data = final_longitudinal_data,
id = GEOID,
family = poisson,
corstr = "unstructured"
)
# Output Model Summary and Incidence Rate Ratios
summary(poisson_gee_model)
##
## Call:
## geepack::geeglm(formula = N_SHOOTINGS ~ N_BROKEN_LIGHTS + created_year,
## family = poisson, data = final_longitudinal_data, id = GEOID,
## corstr = "unstructured")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) 448.829995 56.799488 62.442 2.78e-15 ***
## N_BROKEN_LIGHTS 0.012353 0.004656 7.039 0.00798 **
## created_year -0.222397 0.028100 62.639 2.44e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation structure = unstructured
## Estimated Scale Parameters:
##
## Estimate Std.err
## (Intercept) 2.459 0.2596
## Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha.1:2 0.7595 0.09163
## alpha.1:3 0.4816 0.08234
## alpha.1:4 0.4816 0.07975
## alpha.1:5 0.5573 0.08891
## alpha.1:6 0.2690 0.06500
## alpha.2:3 0.6232 0.09262
## alpha.2:4 0.6866 0.08908
## alpha.2:5 0.6844 0.10104
## alpha.2:6 0.3365 0.08126
## alpha.3:4 0.4244 0.06458
## alpha.3:5 0.5023 0.09885
## alpha.3:6 0.2154 0.05049
## alpha.4:5 0.4427 0.05812
## alpha.4:6 0.2284 0.06247
## alpha.5:6 0.2165 0.05758
## Number of clusters: 310 Maximum cluster size: 6
exp(coef(poisson_gee_model))
## (Intercept) N_BROKEN_LIGHTS created_year
## 8.402e+194 1.012e+00 8.006e-01
This section isolates shooting incidents into two distinct groups based on the time of day—DAY (Daytime) and NIGHT (Nighttime)—using the precise sunrise/sunset calculations performed in the preliminary data preparation. We then repeat the cross-sectional and longitudinal analyses for each group to see if the association with street lights out differs based on natural light conditions.
# Filter for 2025 day incidents
shooting25_sf_DAY <- shooting_data_DAY |> filter(created_year == 2025) |>
st_as_sf(coords = c("longitude", "latitude"), crs = 4326)
# Calculate DAYTIME Counts Per Tract (2025)
shootings_per_tract_25_DAY <- st_join(manhattan_pop, shooting25_sf_DAY) |>
group_by(GEOID) |>
summarize(N_SHOOTINGS_2025 = sum(!is.na(incident_key)), .groups = 'drop') |>
st_drop_geometry()
final_cross_section_2025_DAY <- manhattan_pop |>
left_join(shootings_per_tract_25_DAY, by = "GEOID") |>
left_join(lights_per_tract_25, by = "GEOID") |>
mutate(
N_BROKEN_LIGHTS_2025 = replace_na(N_BROKEN_LIGHTS_2025, 0),
N_SHOOTINGS_2025 = replace_na(N_SHOOTINGS_2025, 0)
) |>
st_drop_geometry()
poisson_model_2025_DAY <- glm(
N_SHOOTINGS_2025 ~ N_BROKEN_LIGHTS_2025,
family = poisson,
data = final_cross_section_2025_DAY
)
summary(poisson_model_2025_DAY)
print(exp(coef(poisson_model_2025_DAY)))
# Filter for 2025 night incidents
shooting25_sf_NIGHT <- shooting_data_NIGHT |> filter(created_year == 2025) |>
st_as_sf(coords = c("longitude", "latitude"), crs = 4326)
# Calculate NIGHTTIME Counts Per Tract (2025)
shootings_per_tract_25_NIGHT <- st_join(manhattan_pop, shooting25_sf_NIGHT) |>
group_by(GEOID) |>
summarize(N_SHOOTINGS_2025 = sum(!is.na(incident_key)), .groups = 'drop') |>
st_drop_geometry()
final_cross_section_2025_NIGHT <- manhattan_pop |>
left_join(shootings_per_tract_25_NIGHT, by = "GEOID") |>
left_join(lights_per_tract_25, by = "GEOID") |>
mutate(
N_BROKEN_LIGHTS_2025 = replace_na(N_BROKEN_LIGHTS_2025, 0),
N_SHOOTINGS_2025 = replace_na(N_SHOOTINGS_2025, 0)
) |>
st_drop_geometry()
poisson_model_2025_NIGHT <- glm(
N_SHOOTINGS_2025 ~ N_BROKEN_LIGHTS_2025,
family = poisson,
data = final_cross_section_2025_NIGHT
)
summary(poisson_model_2025_NIGHT)
print(exp(coef(poisson_model_2025_NIGHT)))
| Time Period | IRR (Incidence Rate Ratios) | P-value |
|---|---|---|
| Daytime | 1.050 | 0.0340 |
| Nighttime | 1.044 | 0.0309 |
# --- DAYTIME Shooting Counts (2020-2025) ---
shooting_sf_DAY <- st_as_sf(
shooting_data_DAY |> drop_na(longitude, latitude), # ADD drop_na() here
coords = c("longitude", "latitude"),
crs = 4326
)
shootings_per_year_DAY <- st_join(manhattan_pop, shooting_sf_DAY) |>
st_drop_geometry() |>
filter(!is.na(incident_key)) |>
group_by(GEOID, created_year) |>
summarize(N_SHOOTINGS = n(), .groups = 'drop') |>
mutate(GEOID = as.character(GEOID))
# --- DAYTIME Merged Data ---
final_longitudinal_data_DAY <- base_grid |>
left_join(shootings_per_year_DAY, by = c("GEOID", "created_year")) |>
left_join(lights_per_year, by = c("GEOID", "created_year")) |>
mutate(
N_SHOOTINGS = replace_na(N_SHOOTINGS, 0),
N_BROKEN_LIGHTS = replace_na(N_BROKEN_LIGHTS, 0)
)
poisson_gee_model_DAY <- geepack::geeglm(
N_SHOOTINGS ~ N_BROKEN_LIGHTS + created_year,
data = final_longitudinal_data_DAY,
id = GEOID,
family = poisson,
corstr = "unstructured"
)
summary(poisson_gee_model_DAY)
print(exp(coef(poisson_gee_model_DAY)))
# --- NIGHTTIME Shooting Counts (2020-2025) ---
shooting_sf_NIGHT <- st_as_sf(
shooting_data_NIGHT |> drop_na(longitude, latitude), # ADD drop_na() here
coords = c("longitude", "latitude"),
crs = 4326
)
shootings_per_year_NIGHT <- st_join(manhattan_pop, shooting_sf_NIGHT) |>
st_drop_geometry() |>
filter(!is.na(incident_key)) |>
group_by(GEOID, created_year) |>
summarize(N_SHOOTINGS = n(), .groups = 'drop') |>
mutate(GEOID = as.character(GEOID))
# --- NIGHTTIME Merged Data ---
final_longitudinal_data_NIGHT <- base_grid |>
left_join(shootings_per_year_NIGHT, by = c("GEOID", "created_year")) |>
left_join(lights_per_year, by = c("GEOID", "created_year")) |>
mutate(
N_SHOOTINGS = replace_na(N_SHOOTINGS, 0),
N_BROKEN_LIGHTS = replace_na(N_BROKEN_LIGHTS, 0)
)
poisson_gee_model_NIGHT <- geepack::geeglm(
N_SHOOTINGS ~ N_BROKEN_LIGHTS + created_year,
data = final_longitudinal_data_NIGHT,
id = GEOID,
family = poisson,
corstr = "unstructured"
)
summary(poisson_gee_model_NIGHT)
print(exp(coef(poisson_gee_model_NIGHT)))
| Time Period | IRR (Incidence Rate Ratios) | P-value (Wald Test) |
|---|---|---|
| Daylight | 1.01 | 0.008 |
| Nighttime | 1.02 | 0.000 |